We take the kobo data that has one record per subject and we join with the foodbook_daily_intakes dataset which has one record per subject per recall, group code etc…

library(dduh.ds)
library(tidyverse)  # data manipulation
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
config <- dduh.ds::load_config(config_file = "../../configs/ea_data.yaml")

koboData <- readRDS(config$data_repositories$kobo)
foodbookDailyIntakes <- readRDS(config$data_repositories$foodbook_daily_intakes) |>
  mutate(MealShort=if_else(str_ends(Meal, 'SNACK'), 'SNACK', 'MEAL' )) 


combinedProfiles <-
  koboData |> select(
    participant_id,
    sport_remap,
    gender,
    frequency_nondiet_drinks,
    count_icdas_score_2_3,
    count_icdas_score_2_4,
    count_icdas_score_2_5,
    count_icdas_score_2_6,
  ) |>
  mutate(
    combined_icdas_score_2 =
      count_icdas_score_2_3 +
      count_icdas_score_2_4 +
      count_icdas_score_2_5 +
      count_icdas_score_2_6
  ) |>
inner_join(foodbookDailyIntakes,
           by = join_by(participant_id == User),
           keep = TRUE) |>
  filter(!is.na(User)) |>
  select(-User)

Redoing this analysis for all recalls and overall descriptives food for paper 2, added Fat, protein. ALso had prev only had ‘dental nutrients’ so energy intake eg low, included all nutrients now

dentalNutStats <- function(fb24Daily, groupCols){
  dentalFoodGroups <- read_csv("food-group-dental-analysis.csv")
  dentalNutrients <- c("ENERGY (KCAL)", "FAT", "PROTEIN", "CHO", "SUGARS", "STARCH")

fbSubjectIntakeLong <- 
  fb24Daily |> 
  left_join(dentalFoodGroups, by = join_by(GroupCodeShort)) |>
  mutate(
    GroupCodeShort = if_else(is.na(GroupCodeShortId), "Others", GroupCodeShort),
    GroupCodeShortIsCariogenic = case_when(
      is.na(IsCariogenic) ~ "NonCariogenic",
      GroupCodeShort == "Beverages Water" ~ "Water",
      IsCariogenic ~ "Cariogenic", 
    )
  ) |>
  mutate(MealShort = if_else(str_ends(Meal, 'SNACK'), 'SNACK', 'MEAL')) |>
  select(
    participant_id,
    consumption, 
    all_of(dentalNutrients),
    all_of(groupCols),
   `Recall #`,
   GroupCodeShort,
   IsCariogenic,
  ) |>
  pivot_longer(cols=dentalNutrients, names_to = "Nutrient", values_to = "intake") |>
  group_by(across(all_of(c("participant_id", "Nutrient", groupCols)))) |>
  reframe(total_consumptions=sum(consumption),
          max_daily_consumptions=max(consumption),
          mean_intake = mean(intake),
          max_intake = max(intake),
          sum_intake = sum(intake),
          recalls=n_distinct(`Recall #`)
          ) |>
  mutate(mean_daily_intake = sum_intake / recalls)

fbSubjectIntakeLong
}
nutrients <-  c("ENERGY (KCAL)",
                "FAT",
                "PROTEIN",
                "CHO",
                "SUGARS",
                "STARCH")

nutrientsByGender <- dentalNutStats(combinedProfiles, c("gender")) 
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(dentalNutrients)
## 
##   # Now:
##   data %>% select(all_of(dentalNutrients))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
nutrientsByGenderPivot <- nutrientsByGender |>
  select(participant_id, gender, Nutrient, mean_daily_intake) |>
  pivot_longer(cols = c("mean_daily_intake"), names_to = "statistic")

p <- ggplot(nutrientsByGenderPivot) +
  geom_boxplot(aes(y=value, x=statistic, fill=gender)) +
  facet_wrap(Nutrient~., scales = "free_y") +
  ggtitle("Nutrients daily intake")
print(p)

meanNutrientByGender <- nutrientsByGender |> 
  group_by(gender, Nutrient) |>
  reframe(
    daily_mean_intake = mean(mean_daily_intake, na.rm = TRUE),
    sd_daily_intake = sd(mean_daily_intake, na.rm = TRUE)
  ) |>
  pivot_wider(names_from = gender, values_from = c(daily_mean_intake, sd_daily_intake))

meanNutrient <- nutrientsByGender |> 
  group_by(Nutrient) |>
  reframe(
    daily_mean_intake = mean(mean_daily_intake, na.rm = TRUE),
    sd_daily_intake = sd(mean_daily_intake, na.rm = TRUE)
  )


knitr::kable(meanNutrient, caption = "Overall daily mean intake", digits = 2)
Overall daily mean intake
Nutrient daily_mean_intake sd_daily_intake
CHO 323.94 188.86
ENERGY (KCAL) 2677.71 1352.26
FAT 98.38 52.12
PROTEIN 139.19 69.51
STARCH 168.75 90.31
SUGARS 127.78 93.55
knitr::kable(meanNutrientByGender, caption = "Daily mean intake by gender", digits = 2)
Daily mean intake by gender
Nutrient daily_mean_intake_Female daily_mean_intake_Male sd_daily_intake_Female sd_daily_intake_Male
CHO 301.99 336.78 151.80 207.80
ENERGY (KCAL) 2407.42 2835.81 1085.34 1473.13
FAT 86.46 105.35 41.46 56.66
PROTEIN 119.72 150.59 56.62 74.19
STARCH 153.50 177.67 64.36 102.03
SUGARS 121.47 131.48 71.70 104.71

By meal type and cariogenic level

nutrients <-  c("ENERGY (KCAL)",
                "CHO",
                "SUGARS",
                "STARCH")

nutrientsByFoodProfile <- 
  dentalNutStats(combinedProfiles, c("MealShort", "GroupCodeShortIsCariogenic")) |>
  filter(GroupCodeShortIsCariogenic != "Water") |>
  mutate()
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nutrientsByFoodProfilePivot <- nutrientsByFoodProfile |>
  filter(Nutrient %in% nutrients) |>
  select(participant_id, Nutrient, mean_daily_intake, MealShort, GroupCodeShortIsCariogenic) |>
  pivot_longer(cols = c("mean_daily_intake"), names_to = "statistic") 

p <- ggplot(nutrientsByFoodProfilePivot) +
  geom_boxplot(aes(y=value, x=GroupCodeShortIsCariogenic, fill=MealShort)) +
  facet_wrap(Nutrient~., scales = "free_y") +
  xlab("meal type and cariogenic classification") +
  ylab("mean daily intake (g)")
print(p)

meanNutrientByFoodProfile <- nutrientsByFoodProfile |> 
  group_by(Nutrient, GroupCodeShortIsCariogenic, MealShort) |>
  reframe(
    daily_mean_intake = mean(mean_daily_intake, na.rm = TRUE),
    sd_daily_intake = sd(mean_daily_intake, na.rm = TRUE)
  )


knitr::kable(meanNutrientByFoodProfile, caption = "Daily mean intake by meal type and cariogenic", digits = 2)
Daily mean intake by meal type and cariogenic
Nutrient GroupCodeShortIsCariogenic MealShort daily_mean_intake sd_daily_intake
CHO Cariogenic MEAL 86.15 69.47
CHO Cariogenic SNACK 73.81 78.88
CHO NonCariogenic MEAL 129.10 72.20
CHO NonCariogenic SNACK 30.14 32.88
ENERGY (KCAL) Cariogenic MEAL 542.93 423.83
ENERGY (KCAL) Cariogenic SNACK 488.85 449.33
ENERGY (KCAL) NonCariogenic MEAL 1392.47 605.84
ENERGY (KCAL) NonCariogenic SNACK 298.62 301.01
FAT Cariogenic MEAL 13.69 12.88
FAT Cariogenic SNACK 16.22 15.19
FAT NonCariogenic MEAL 61.11 31.04
FAT NonCariogenic SNACK 12.71 14.95
PROTEIN Cariogenic MEAL 24.65 25.66
PROTEIN Cariogenic SNACK 14.56 25.45
PROTEIN NonCariogenic MEAL 87.74 37.73
PROTEIN NonCariogenic SNACK 15.28 17.03
STARCH Cariogenic MEAL 27.77 23.04
STARCH Cariogenic SNACK 18.61 18.69
STARCH NonCariogenic MEAL 90.20 60.08
STARCH NonCariogenic SNACK 11.11 24.54
SUGARS Cariogenic MEAL 48.47 48.60
SUGARS Cariogenic SNACK 45.34 51.38
SUGARS NonCariogenic MEAL 30.90 25.45
SUGARS NonCariogenic SNACK 18.03 19.08

By Group Short code

nutrients <-  c("ENERGY (KCAL)",
                "CHO",
                "SUGARS",
                "STARCH")

nutrientsByFoodProfile <- 
  dentalNutStats(combinedProfiles, c("GroupCodeShort")) 
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nutrientsByFoodProfilePivot <- nutrientsByFoodProfile |>
  filter(Nutrient %in% nutrients) |>
  select(participant_id, Nutrient, mean_daily_intake, GroupCodeShort) |>
  pivot_longer(cols = c("mean_daily_intake"), names_to = "statistic") 

p <- ggplot(nutrientsByFoodProfilePivot) +
  geom_boxplot(aes(y=value, x=statistic, fill=GroupCodeShort)) +
  facet_wrap(Nutrient~., scales = "free_y") +
  xlab("mean daily intake") +
  ggtitle("Nutrients daily intake by food group")
print(p)

meanNutrientByGroupCodeShort <- nutrientsByFoodProfile |> 
  group_by(Nutrient, GroupCodeShort) |>
  reframe(
    daily_mean_intake = mean(mean_daily_intake, na.rm = TRUE),
    sd_daily_intake = sd(mean_daily_intake, na.rm = TRUE)
  ) |>
  group_by(Nutrient) |>
  arrange(desc(daily_mean_intake), .by_group=TRUE) |>
  top_n(5, daily_mean_intake)



knitr::kable(meanNutrientByGroupCodeShort, caption = "Contributors to each nutrient", digits = 2)
Contributors to each nutrient
Nutrient GroupCodeShort daily_mean_intake sd_daily_intake
CHO Others 150.23 89.28
CHO Breads, Rolls and Scones 58.58 29.63
CHO Fruit drinks and others 52.97 39.07
CHO Beverages Sugar-sweetened 44.95 53.08
CHO Breakfast cereals 44.73 23.06
ENERGY (KCAL) Others 1603.18 750.13
ENERGY (KCAL) Dietary Supplements 327.38 379.83
ENERGY (KCAL) Breads, Rolls and Scones 302.24 148.29
ENERGY (KCAL) Biscuits, Cakes and Savouries 294.44 243.10
ENERGY (KCAL) Breakfast cereals 290.48 143.30
FAT Others 70.11 35.93
FAT Sugars, Confectionary and Chocolate Confectionary 14.73 5.98
FAT Biscuits, Cakes and Savouries 14.62 13.45
FAT Creams, Ice-creams and Desserts 10.16 10.02
FAT Sugars, Confectionary and Crisps and savoury Snacks 9.25 3.69
PROTEIN Others 98.61 45.04
PROTEIN Dietary Supplements 34.58 33.49
PROTEIN Breads, Rolls and Scones 10.34 4.94
PROTEIN Breakfast cereals 9.80 5.44
PROTEIN Yoghurt & yoghurt drinks 9.18 6.68
STARCH Others 98.44 72.89
STARCH Breads, Rolls and Scones 52.84 26.74
STARCH Breakfast cereals 34.52 21.36
STARCH Sugars, Confectionary and Crisps and savoury Snacks 20.73 8.54
STARCH Biscuits, Cakes and Savouries 17.93 13.92
SUGARS Fruit drinks and others 52.13 38.48
SUGARS Others 43.17 33.97
SUGARS Beverages Sugar-sweetened 40.21 48.44
SUGARS Sugars, Confectionary and Chocolate Confectionary 26.85 11.05
SUGARS Creams, Ice-creams and Desserts 20.79 20.51

Analysis by sport

combinedProfiles <-
  koboData |> select(
    participant_id,
    sport_remap,
    gender,
    frequency_nondiet_drinks,
    count_icdas_score_2_3,
    count_icdas_score_2_4,
    count_icdas_score_2_5,
    count_icdas_score_2_6,
  ) |>
  mutate(
    combined_icdas_score_2 =     count_icdas_score_2_3 +
      count_icdas_score_2_4 +
      count_icdas_score_2_5 +
      count_icdas_score_2_6
  ) |>
inner_join(foodbookDailyIntakes,
           by = join_by(participant_id == User),
           keep = TRUE) |>
  filter(!is.na(User)) |>
  select(-User) |>
  filter(!sport_remap %in% c('Gymnast', 'Triathelete'))
ggplot(koboData) +
  geom_bar(aes(x=sport_remap))

p <- combinedProfiles |> select(
    participant_id,
    sport_remap)
print(p)
## # A tibble: 2,783 × 2
##    participant_id sport_remap
##    <chr>          <chr>      
##  1 aa944ab0       Cyclist    
##  2 aa944ab0       Cyclist    
##  3 aa944ab0       Cyclist    
##  4 aa944ab0       Cyclist    
##  5 aa944ab0       Cyclist    
##  6 aa944ab0       Cyclist    
##  7 aa944ab0       Cyclist    
##  8 aa944ab0       Cyclist    
##  9 aa944ab0       Cyclist    
## 10 aa944ab0       Cyclist    
## # ℹ 2,773 more rows

For each nutrients we plot the distribution by a specific facet / dimension

nutrients <-  c("ENERGY (KCAL)",
                "CHO",
                "SUGARS",
                "STARCH")

nutrientsBySportMealGender <- dentalNutStats(combinedProfiles, c("sport_remap", "MealShort", "gender")) |> 
  pivot_longer(cols = c("mean_intake", "max_intake", "mean_daily_intake"),
               names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){ 
p <- ggplot(nutrientsBySportMealGender |> filter(Nutrient == nut )) +
  geom_boxplot(aes(y=value, x=statistic, fill=sport_remap)) +
  facet_wrap(MealShort+gender~.) +
  ggtitle(paste(nut, " mean_intake"))
print(p)
}

nutrientsBySportGender <- dentalNutStats(combinedProfiles, c("sport_remap",  "gender")) |> 
  pivot_longer(cols = c("mean_intake", "max_intake", "mean_daily_intake"),
               names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){ 
p <- ggplot(nutrientsBySportGender |> filter(Nutrient == nut )) +
  geom_boxplot(aes(y=value, x=statistic, fill=sport_remap)) +
  facet_wrap(gender~.) +
  ggtitle(paste(nut, " mean_intake"))
print(p)
}

nutrientsBySportGenderGroupCodeShort <- dentalNutStats(combinedProfiles, c("sport_remap",  "gender", "GroupCodeShort")) |> 
  pivot_longer(cols = c("mean_intake", "max_intake", "mean_daily_intake"),
               names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){ 
p <- ggplot(nutrientsBySportGenderGroupCodeShort |> filter(Nutrient == nut )) +
  geom_boxplot(aes(y=value, x=statistic, fill=sport_remap)) +
  facet_wrap(GroupCodeShort+gender~.) +
  ggtitle(paste(nut, " mean_intake"))
print(p)
}

nutrientsBySportGenderICDAS <- dentalNutStats(combinedProfiles, c("sport_remap",  "gender", "combined_icdas_score_2")) |> 
  pivot_longer(cols = c("mean_intake", "max_intake", "mean_daily_intake"),
               names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){ 
p <- ggplot(nutrientsBySportGenderICDAS |> filter(Nutrient == nut )) +
  geom_point(aes(y=value, x=combined_icdas_score_2, color=sport_remap)) +
  facet_wrap(statistic+gender~.) +
  ggtitle(paste(nut, " mean_intake"))
print(p)
}

nutrientsBySportGender <- dentalNutStats(combinedProfiles, c("sport_remap",  "gender")) |> 
  pivot_longer(cols = c("mean_daily_intake"),
               names_to = "statistic")
## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GroupCodeShort
## dbl (1): GroupCodeShortId
## lgl (1): IsCariogenic
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
for (nut in nutrients){ 
p <- ggplot(nutrientsBySportGender |> filter(Nutrient == nut )) +
  geom_boxplot(aes(y=value, x=statistic, fill=sport_remap)) +
  facet_wrap(gender~.) +
  ggtitle(paste(nut, "mean_daily_intake"))
print(p)
}